library(tidyverse)
library(haven)
library(sjlabelled)
library(RColorBrewer)
library(survey)
library(gridExtra)
library(data.table)
library(ggeffects)
library(stargazer)
library(weights)


# early cleaning ----------------------------------------------------------


## loading 

full <- read_dta("~/Dropbox (MIT)/antisemitism/data/TUFT0009_OUTPUT.DTA")

## early filtering

#get rid of business qs
full <- full %>% dplyr::select(!starts_with("q3"))

#get rid of earlier qs
full <- full %>% dplyr::select(!q8:q22_c)

#rename columns
full <- rename(full, israel_culture =  q5_a, mexico_culture =  q5_b, 
               india_culture =  q5_c, 
               china_culture =  q5_d, nigeria_culture =  q5_e, iran_culture =  q5_f, 
               russia_culture =  q5_g, israel_politics =  q6_a, mexico_politics =  q6_b,
               india_politics =  q6_c, china_politics =  q6_d, nigeria_politics =  q6_e, 
               iran_politics =  q6_f, russia_politics =  q6_g, israel_overall =  q7_a, 
               mexico_overall =  q7_b, india_overall =  q7_c, china_overall =  q7_d, 
               nigeria_overall =  q7_e, iran_overall =  q7_f, russia_overall =  q7_g)


# analysis ----------------------------------------------------------------


## calculate mean favorability for each age x ideology x country x condition --------

#make copy for this purpose
data <- full

#strip labels out, set values of 1 for favorable and 0 for unfavorable answers
data[, match("israel_culture", 
             colnames(data)):match("russia_overall", colnames(data))] <- 
  data[, match("israel_culture", 
               colnames(data)):match("russia_overall", colnames(data))] %>% 
  remove_all_labels() %>% 
  mutate(across(.fns = ~case_when(. < 3 ~ 1, . < 5 ~ 0)))

#strip out all labels
data <- remove_all_labels(data)

#make broader ideology column
data <- data %>% mutate(ideo = case_when( ideo7 < 4 ~ "liberal", 
                                          ideo7 > 4 ~ "conservative", 
                                          ideo7 == 4 ~ "moderate")) %>% 
                 mutate(ideo = factor(ideo, 
                                      levels = c("conservative", "moderate", "liberal")))


df_means <- data %>% dplyr::select(caseid, age, ideo7, weight_age30, weight_overall, 
                      ends_with("culture"), ends_with("overall"), ends_with("politics"), ideo) 

## apply survey weights

#apply weights under 30
under30_weighted <- svydesign(ids = ~1, weights = ~weight_age30, 
                              data = df_means[!is.na(df_means$weight_age30),])

#apply weights over 30, only choose the results for those over 30
over30_weighted <- subset(svydesign(ids = ~1, weights = ~weight_overall, data = df_means), age >30)


#get variables we want means of -- check this because it's hardcoded by index
variables <- colnames(under30_weighted)[6:26]

#get means for each variable, add column for country/question asked about
under30_means <- lapply(variables, function(x) 
  cbind(svyby(as.formula(paste0("~", x)), 
              by = ~ideo7, under30_weighted, 
              svymean, na.rm = TRUE, vartype=c("ci", "se")), 
        variable = x))

over30_means <- lapply(variables, function(x) 
  cbind(svyby(as.formula(paste0("~", x)), 
              by = ~ideo7, over30_weighted, 
              svymean, na.rm = TRUE, vartype=c("ci", "se")), 
        variable = x))

# compare to make sure the code is right
svyby(~israel_overall, ~ideo7, under30_weighted, svymean, na.rm = TRUE, vartype=c("ci", "se"))

#split into two columns -- one for country, one for question
under30_means <- lapply(1:length(under30_means), 
                        function (x) 
                          separate(under30_means[[x]], variable, 
                                   into = c("country", "type"), sep="_"))

over30_means <- lapply(1:length(over30_means), 
                       function (x) 
                         separate(over30_means[[x]], variable, 
                                  into = c("country", "type"), sep="_"))

#make column names uniform and collapse each list of lists into one df
under30_means <- do.call(rbind, lapply(under30_means, setNames, 
                                       c("ideo7", "mean", "se", "ci_l", "ci_u", "country", "type")))

over30_means <- do.call(rbind, lapply(over30_means, setNames, 
                                      c("ideo7", "mean", "se", "ci_l", "ci_u", "country", "type")))


#change type into factors w correct levels, make sure ideology is numeric, add age column

under30_means$type <- factor(under30_means$type, levels = c("overall", "politics", "culture"))
over30_means$type <- factor(over30_means$type, levels = c("overall", "politics", "culture"))

under30_means$ideo7 <- as.numeric(under30_means$ideo7)
over30_means$ideo7 <- as.numeric(over30_means$ideo7)

under30_means$age <- "under30"
over30_means$age <- "over30"


#combine into one data
means <- rbind(under30_means, over30_means)

# remove old dataframes
rm(over30_means, under30_means, df_means)




## calculate mean favorability with ideo3 ----------------------------------

### data means by big ideo group

#get means for each variable, add column for country/question asked about
under30_means <- lapply(variables, function(x) 
  cbind(svyby(as.formula(paste0("~", x)), 
              by = ~ideo, under30_weighted, svymean, na.rm = TRUE, vartype="ci"), variable = x))

over30_means <- lapply(variables, function(x) 
  cbind(svyby(as.formula(paste0("~", x)), 
              by = ~ideo, over30_weighted, svymean, na.rm = TRUE, vartype="ci"), variable = x))


#split into two columns -- one for country, one for question
under30_means <- lapply(1:length(under30_means), 
                        function (x) 
                          separate(under30_means[[x]], variable, 
                                   into = c("country", "type"), sep="_"))

over30_means <- lapply(1:length(over30_means), 
                       function (x) 
                         separate(over30_means[[x]], variable, 
                                  into = c("country", "type"), sep="_"))

#make column names uniform and collapse each list of lists into one df
under30_means <- do.call(rbind, lapply(under30_means, setNames, 
                                       c("ideo", "mean", "ci_l", "ci_u", "country", "type")))

over30_means <- do.call(rbind, lapply(over30_means, setNames, 
                                      c("ideo", "mean", "ci_l", "ci_u", "country", "type")))


#change type into factors w correct levels, make sure ideology is numeric, add age column

under30_means$type <- factor(under30_means$type, levels = c("overall", "politics", "culture"))
over30_means$type <- factor(over30_means$type, levels = c("overall", "politics", "culture"))


under30_means$age <- "under30"
over30_means$age <- "over30"


#combine into one df
means_id3 <- rbind(under30_means, over30_means)

rm(under30_means, over30_means)


## calculate favorability with levels of support  ------------------------------------------------------------

#strip out all labels
data_2 <- remove_all_labels(full)

#only select the columns we need
data_2 <- data_2 %>% dplyr::select(caseid, age, ideo7, weight_age30, weight_overall, 
                          ends_with("culture"), ends_with("overall"), ends_with("politics"))

#apply weights under 30
under30_levels <- svydesign(ids = ~1, weights = ~weight_age30, data = data_2[!is.na(data_2$weight_age30),])

#apply weights over 30
over30_levels <- subset(svydesign(ids = ~1, weights = ~weight_overall, data = data_2), age > 30)

# only interested in young, very liberal
young_left <- subset(under30_levels, ideo7 == "1")

young_left_fav <- rbind(
  cbind(as.data.frame(svytable(~ideo7 + china_overall, design=young_left, Ntotal = 100)), 
        country="china") %>% rename(rating = china_overall), 
  cbind(as.data.frame(svytable(~ideo7 + israel_overall, design=young_left, Ntotal = 100)), 
        country="israel") %>% rename(rating = israel_overall),
  cbind(as.data.frame(svytable(~ideo7 + nigeria_overall, design=young_left, Ntotal = 100)), 
        country="nigeria") %>% rename(rating = nigeria_overall),
  cbind(as.data.frame(svytable(~ideo7 + india_overall, design=young_left, Ntotal = 100)), 
        country="india") %>% rename(rating = india_overall),
  cbind(as.data.frame(svytable(~ideo7 + mexico_overall, design=young_left, Ntotal = 100)), 
        country="mexico") %>% rename(rating = mexico_overall),
  cbind(as.data.frame(svytable(~ideo7 + russia_overall, design=young_left, Ntotal = 100)), 
        country="russia") %>% rename(rating = russia_overall),
  cbind(as.data.frame(svytable(~ideo7 + iran_overall, design=young_left, Ntotal = 100)), 
        country="iran") %>% rename(rating = iran_overall))

#reorder countries 
young_left_fav$country <- factor(str_to_sentence(young_left_fav$country), 
                               levels = c("Russia", "Iran", "China", 
                                          "Israel", "Nigeria", "India", "Mexico"))
#reorder ratings 
young_left_fav$rating <- factor(young_left_fav$rating, 
                              levels = c("1", "2", "5", "3", "4"))





## issue rankings  -------------------------------------------------------

# add in state jewish population for eventual regression
pct_jew_pop <- read_dta("~/Dropbox (MIT)/antisemitism/data/pct_jew_pop.dta")

# pick columns we want
df_issue <- data %>% dplyr::select(-starts_with("q"), starts_with("q4"), starts_with("q2"), q1) %>% 
  dplyr::select(-ends_with(c("overall", "culture", "politics")), weight_overall) %>%
  dplyr::select(-caseid, birthyr, starttime, endtime)

# rename the issue ranking columns
df_issue <- rename(df_issue, abortion = q4_1, racial_eq = q4_2, health = q4_3,
                   israel_palest = q4_4, immigration = q4_5, firearms = q4_6,
                   economics = q4_7, enviro = q4_8, russian_int = q4_9,
                   natsec = q4_10, china_hr = q4_11)

# rename the identity columns 
df_issue <- rename(df_issue, socialist = q2_mult_1,
                   leftist = q2_mult_2, progressive = q2_mult_3,
                   libertarian = q2_mult_4, altright = q2_mult_5,
                   chcon = q2_mult_6)

# filter out anyone who didn't rank all issues
df_issue_full <- na.omit(setDT(df_issue), cols = c("abortion", "racial_eq", "health", "israel_palest", 
                                                   "immigration", "firearms", "economics", "enviro",
                                                   "russian_int", "natsec", "china_hr"))

# add in jewish population by state
df_issue_full$pct_jew <- pct_jew_pop$pct_jewish[match(df_issue_full$inputstate, pct_jew_pop$inputstate)]


# make a column for current college attendance 
df_issue_full$college <- ifelse(df_issue_full$q1 == 2 | df_issue_full$q1 == 3, "4-year college", ifelse(df_issue_full$q1 == 1, "no", "Other college"))

# make a simpler race column
df_issue_full$race_simp <- case_when(
  df_issue_full$race == 1 ~ "White",
  df_issue_full$race == 2 ~ "Black",
  df_issue_full$race == 3 ~ "Latino",
  TRUE ~ "Other race"
)

# turn into a dataframe
df_issue_full <- as.data.frame(df_issue_full)

# make identity column 1 if they said they identify as it and 0 if not
for (i in 38:43){ 
  
  df_issue_full[,i] <- ifelse(df_issue_full[,i] == 1, 1, 0)
}


# apply weights under 30
under30_issue <- svydesign(ids = ~1, weights = ~weight_age30, data = df_issue_full[!is.na(df_issue_full$weight_age30),])

# apply weights over 30
over30_issue <- subset(svydesign(ids = ~1, weights = ~weight_overall, data = df_issue_full), age >30)

# make vector of issues for the lapply we're about to do
issues <- c("abortion", "racial_eq", "health", "israel_palest", 
            "immigration", "firearms", "economics", "enviro",
            "russian_int", "natsec", "china_hr")

# get mean by issue
under30_iss_means <- lapply(issues, function(x) 
  cbind(svyby(as.formula(paste0("~", x)), 
              by = ~ideo7, under30_issue, svymean, na.rm = TRUE, vartype="ci"), issue = x))

over30_iss_means <- lapply(issues, function(x) 
  cbind(svyby(as.formula(paste0("~", x)), 
              by = ~ideo7, over30_issue, svymean, na.rm = TRUE, vartype="ci"), issue = x))


#make column names uniform and collapse each list of lists into one df
under30_iss_means <- do.call(rbind, lapply(under30_iss_means, setNames, 
                                           c("ideo7", "mean", "ci_l", "ci_u", "issue")))

over30_iss_means <- do.call(rbind, lapply(over30_iss_means, setNames, 
                                          c("ideo7", "mean", "ci_l", "ci_u", "issue")))



under30_iss_means$ideo7 <- as.numeric(under30_iss_means$ideo7)
over30_iss_means$ideo7 <- as.numeric(over30_iss_means$ideo7)

under30_iss_means$age <- "under30"
over30_iss_means$age <- "over30"

iss_means <- rbind(under30_iss_means, over30_iss_means)

rm(under30_iss_means, over30_iss_means, df_issue)


## rates of answering "i don't know" -------------------------------------------------------

dk_rates <- data.frame()

for (variable in variables) {
  temp <- cbind(as.data.frame(
    svytable(as.formula(paste0("~", variable)), design=under30_levels, Ntotal = 100)),
    var = variable) %>% rename(rating = variable)
  
  dk_rates <- rbind(dk_rates, temp)
  rm(temp)
  
}

dk_rates %>%
  separate(var, into=c("country", "type"), sep="_") %>% filter(rating == 5) %>%
  dplyr::select(!rating) %>% pivot_wider(names_from="type", values_from=Freq) %>%
  relocate(country, overall, politics, culture) %>% 
  arrange(factor(country, levels = c("russia", "iran", "china", 
                                     "israel", "nigeria", "india", "mexico"))) %>% xtable()
  



## p-values difference of means --------------------------------------------

## t-test

library(weights)

### fig2 --------------------------------------------

out_fig2 <- data.frame()

for (i in 1:7) {
  temp_u30 <- data %>% filter(!is.na(weight_age30), ideo7 == i, !is.na(israel_overall))
  temp_o30 <- data %>% filter(age > 30, ideo7 == i, !is.na(israel_overall))
  
  temp.out <- c(i, wtd.t.test(temp_u30$israel_overall, temp_o30$israel_overall, 
             weight = temp_u30$weight_age30, weighty = temp_o30$weight_overall)$coefficients[3])
  
  out_fig2 <- rbind(out_fig2, temp.out)
  rm(temp_u30, temp_o30, temp.out)
  
}

out_fig2


### fig3 --------------------------------------------

out_fig3 <- data.frame()

ideologies <- c("liberal", "moderate", "conservative")
countries <- c("russia_overall", "iran_overall", "china_overall", 
               "israel_overall", "nigeria_overall", "india_overall",
               "mexico_overall")


for (ideology in ideologies) {
for (country in countries) {
  temp_u30 <- data %>% filter(!is.na(weight_age30), ideo == ideology)
  temp_u30 <- temp_u30[!is.na(temp_u30[[country]]), ]
  temp_o30 <- data %>% filter(age > 30, ideo == ideology)
  temp_o30 <- temp_o30[!is.na(temp_o30[[country]]), ]
  
  
  temp.out <- data.frame(ideology, country, wtd.t.test(temp_u30[[country]], temp_o30[[country]], 
                              weight = temp_u30$weight_age30, 
                              weighty = temp_o30$weight_overall)$coefficients[3])
  
  out_fig3 <- rbind(out_fig3, temp.out)
  rm(temp_u30, temp_o30, temp.out)
}
}

colnames(out_fig3) <- c("ideology", "country", "p")

out_fig3 %>% arrange(country) %>% mutate(signif = p < 0.05)


### fig4 --------------------------------------------

out_fig4 <- data.frame()

for (i in 1:7) {
  temp_u30 <- df_issue_full %>% filter(!is.na(weight_age30), ideo7 == i)
  temp_o30 <- df_issue_full %>% filter(age > 30, ideo7 == i)
  
  temp.out <- c(i, wtd.t.test(temp_u30$israel_palest, temp_o30$israel_palest, 
                              weight = temp_u30$weight_age30, 
                              weighty = temp_o30$weight_overall)$coefficients[3])
  
  out_fig4 <- rbind(out_fig4, temp.out)
  rm(temp_u30, temp_o30, temp.out)
  
}

out_fig4


### fig5 --------------------------------------------

out_fig5 <- data.frame()


for (i in 1:7) {
  for (variable in c("israel_overall", "israel_culture", "israel_politics")) {
    temp_u30 <- data %>% filter(!is.na(weight_age30), ideo7 == i)
    temp_u30 <- temp_u30[!is.na(temp_u30[[variable]]), ]
    temp_o30 <- data %>% filter(age > 30, ideo7 == i)
    temp_o30 <- temp_o30[!is.na(temp_o30[[variable]]), ]
    
    
    temp.out <- data.frame(i, variable, 
                           wtd.t.test(temp_u30[[variable]], temp_o30[[variable]], 
                                      weight = temp_u30$weight_age30, 
                                      weighty = temp_o30$weight_overall)$coefficients[3])
    
    out_fig5 <- rbind(out_fig5, temp.out)
    rm(temp_u30, temp_o30, temp.out)
  }
}


fig5_signif <- data.frame(
  label = ifelse(out_fig5[[3]] < 0.05, "**", ""),
  type  = c(rep(c("Overall", "Culture", "Politics"), 7)),
  y   = 0,
  x   = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7)
)

fig5_signif <- data.frame(
  label = case_when(
    out_fig5[[3]] < 0.05 ~ "**",
    out_fig5[[3]] < 0.1 ~ "*",
    TRUE ~ ""
  ),
  type  = c(rep(c("Overall", "Culture", "Politics"), 7)),
  y   = 0,
  x   = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7)
)


means %>% filter(country == "israel") %>%
  mutate(type = factor(str_to_sentence(type), levels=c("Overall", "Politics", "Culture"))) %>%
  mutate(ci_u = ifelse(ci_u > 1, 0.999, ci_u)) %>% 
  ggplot(aes(x = ideo7)) + 
  geom_point(aes(y = mean*100, color=age), position=position_dodge(width=0.3))  +
  # geom_line(stat="smooth", method = "loess", aes(y = mean*100), se=FALSE, size=0.4, alpha = 1) +
  geom_errorbar(data = subset(means %>%
                                mutate(type = factor(str_to_sentence(type), 
                                                     levels=c("Overall", 
                                                              "Politics", "Culture"))) %>% 
                                mutate(ci_u = ifelse(ci_u > 1, 0.999, ci_u)), 
                              country=="israel"),
                aes(x=ideo7, ymin = ci_l*100, ymax=100*ci_u, color=age), 
                width=0, position=position_dodge(width=0.3), alpha=0.5) +
  facet_wrap(~type) + 
  ylab("Percent favorability\n") + 
  xlab("\nRespondent Ideology") + 
  scale_x_continuous(breaks = c(2, 4, 6), labels=c("Lib.", "Mod.", "Con.")) + 
  theme_bw() +
  theme(strip.background = element_blank(), 
        axis.ticks.y = element_blank(),
        legend.position="bottom", legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(-7,-7,-7,-7)) +
  guides(color = guide_legend(reverse=TRUE)) + 
  scale_color_manual(name = NULL, values=c("#46ACC8", "darkgreen"), 
                     labels = c("Ages 31+", "Ages 18-30")) +
  ylim(0, 100) + geom_text(data = fig5_signif,
                           mapping = aes(x = x, y = y, label = label))





### fig6 --------------------------------------------


out_fig6 <- data.frame()

cultures <- c("russia_culture", "iran_culture", "china_culture",
              "nigeria_culture", "india_culture",
               "mexico_culture")


for (ideology in 1:7) {
  for (country in cultures) {
    temp_u30 <- data %>% filter(!is.na(weight_age30), ideo7 == ideology)
    temp_u30 <- temp_u30[!is.na(temp_u30[[country]]), ]

    temp_u30_israel <- data %>% filter(!is.na(weight_age30), 
                                       ideo7 == ideology, !is.na(israel_culture))
    
    temp.out <- data.frame(ideology, 
                           country, 
                           wtd.t.test(temp_u30[[country]],
                                      temp_u30_israel$israel_culture,
                                      weight = temp_u30$weight_age30,
                                      weighty = temp_u30_israel$weight_age30)$coefficients[3])
    
    out_fig6 <- rbind(out_fig6, temp.out)
    rm(temp_u30, temp_u30_israel, temp.out)
  }
}



# figures -------------------------------------------------------


## figure 1 -------------------------------------------------------

young_left_fav %>% filter(rating != 5) %>% group_by(country) %>% 
  mutate(freq = Freq * 100 / sum(Freq)) %>%
  ggplot(aes(y=country, x=freq, fill=rating)) +
  geom_bar(stat="identity") + 
  scale_fill_brewer(palette="YlOrRd", 
                    name=NULL,
                    labels=c("Very favorable", "Somewhat favorable", 
                             "Somewhat unfavorable", "Very unfavorable"),
                    guide = guide_legend(reverse = TRUE)) +
  theme_bw() +
  theme(legend.position="bottom", panel.border = element_blank(), 
        panel.grid.major = element_blank(), axis.ticks.y = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_blank(), 
        axis.text.y = element_text(margin = margin(r = 0))) +
  xlab("\nPercent of very liberal 18-30 year olds answering...") + 
  ylab(NULL) +
  scale_y_discrete(limits = rev(levels(as.factor(young_left_fav$country)))) +
  coord_cartesian(xlim = c(0, 100))



## alternate fig 1 for appendix --------------------------------------------

young_left_fav %>% group_by(country) %>% 
  mutate(freq = Freq * 100 / sum(Freq)) %>%
  ggplot(aes(y=country, x=freq, fill=rating)) +
  geom_bar(stat="identity") + 
  scale_fill_manual(name = NULL, values=c("#FFFFB2", "#FECC5C", "grey","#F03B20" , "#BD0026"), 
                    labels=c("Very favorable", "Somewhat favorable", 
                             "Haven't heard enough to say", "Somewhat unfavorable", 
                             "Very unfavorable"),
                    guide = guide_legend(reverse = TRUE)) + theme_bw() +
  theme(legend.position="bottom", panel.border = element_blank(), 
        panel.grid.major = element_blank(), axis.ticks.y = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_blank(), 
        axis.text.y = element_text(margin = margin(r = 0))) +
  xlab("\nPercent of very liberal 18-30 year olds answering...") + 
  ylab(NULL) +
  scale_y_discrete(limits = rev(levels(as.factor(young_left_fav$country)))) +
  coord_cartesian(xlim = c(0, 100)) +   guides(fill=guide_legend(nrow=2, byrow=TRUE, reverse=TRUE))




## figure 2  -------------------------------------------------------

means %>% filter(country == "israel", type == "overall") %>% 
  ggplot(aes(x = ideo7, color=age, y=mean*100)) +
  # geom_line(stat="smooth", method="loess") +
  geom_point(position=position_dodge(width=0.3), size=2) +   
  geom_errorbar(aes(ymin = ci_l*100, ymax=ci_u*100), width=0, position=position_dodge(width=0.3), size=0.3) +
  ggtitle("") +  
  theme_bw() +
  theme(legend.position="bottom", legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(-8,-8,-8,-8), axis.title.x = element_text(margin = margin(t = -5))) +
  ylab("Percentage of respondents viewing Israel favorably") + 
  xlab("\nRespondent Ideology") +  
  scale_x_continuous(breaks = c(2, 4, 6), labels=c("Liberal", "Moderate", "Conservative")) +
  scale_color_manual(name = NULL, values=c("#46ACC8", "darkgreen"), labels = c("Ages 31+", "Ages 18-30"), guide = guide_legend(reverse = TRUE)) +ylim(0, 100) +
  annotate("text", x = 1:7, y = 100, label = case_when(
    out_fig2[[2]] < 0.05 ~ "**",
    out_fig2[[2]] < 0.1 ~ "*",
    TRUE ~ ""
  ), size=6)




## figure 3 ----------------------------------------------------------------

out_fig3 <- out_fig3 %>% arrange(country)

fig3_signif <- data.frame(
  label = case_when(
    out_fig3[[3]] < 0.05 ~ "**",
    out_fig3[[3]] < 0.1 ~ "*",
    TRUE ~ ""
  ),
  country   = c(rep("Russia", 3), rep("Iran", 3), rep("China", 3), rep("Israel", 3),
                rep("Nigeria", 3), rep("India",3), rep("Mexico", 3)),
  x     = 0,
  y     = rep(c("liberal", "moderate", "conservative"), 7)
)

means_id3 %>% filter(type == "overall") %>% mutate(country = factor(str_to_sentence(country), levels = c("Russia", "Iran", "China", "Israel", "Nigeria", "India", "Mexico"))) %>%
  mutate(ideo = factor(ideo, levels = c("conservative", "moderate", "liberal"))) %>%
  ggplot() + 
  geom_point(aes(x = mean*100, y = ideo, color = age), 
             position=position_dodge(width=0.6)) + 
  geom_errorbarh(aes(y = ideo, xmin = ci_l*100, xmax=ci_u*100, color = age), height=0,
                 position=position_dodge(width=0.6)) + 
  facet_wrap(~country, nrow=7) +
  theme_bw() +   
  theme(strip.background = element_blank(), 
        axis.ticks.y = element_blank(), 
        legend.position="bottom", legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(-7,-7,-7,-7)) +
  xlab("Percentage of respondents\nviewing country favorably") + 
  ylab(NULL) + guides(color = guide_legend(reverse=TRUE)) +
  scale_y_discrete(labels = c("Conservative", "Moderate", "Liberal"))  +
  scale_color_manual(name = NULL, values=c("#46ACC8", "darkgreen"), labels = c("Ages 31+", "Ages 18-30")) + xlim(-1, 100) + 
  geom_text(data    = fig3_signif,
    mapping = aes(x = x, y = y, label = label))



## figure 4 ----------------------------------------------------------------


iss_means %>% filter(issue == "israel_palest") %>%
  ggplot() + 
  geom_point(aes(x = ideo7, y = mean, color=age), 
             position=position_dodge(width=0.3)) + 
  geom_errorbar(aes(x = ideo7, ymin = ci_l, ymax = ci_u, color=age), 
                width=0, position=position_dodge(width=0.3)) +   
  ggtitle("") +  
  theme_bw() +
  theme(legend.position="bottom", legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(-8,-8,-8,-8), 
        axis.title.x = element_text(margin = margin(t = -5)),
        panel.grid.minor.y = element_blank()) +
  ylab("Mean Ranking of Israel/Palestine Conflict") + 
  xlab("\nRespondent Ideology") +  
  scale_x_continuous(breaks = c(2, 4, 6), 
                     labels=c("Liberal", "Moderate", "Conservative")) +
  scale_color_manual(name = NULL, values=c("#46ACC8", "darkgreen"), 
                     labels = c("Ages 31+", "Ages 18-30"), 
                     guide = guide_legend(reverse = TRUE)) + 
  scale_y_reverse(limits=c(11, 1), breaks = c(1:11)) + 
  annotate("text", x = 1:7, y = 1, label = case_when(
    out_fig4[[2]] < 0.05 ~ "**",
    out_fig4[[2]] < 0.1 ~ "*",
    TRUE ~ ""
  ), size=6)




## figure 5 --------------------------------------------------------------



fig5_signif <- data.frame(
  label = case_when(
    out_fig5[[3]] < 0.05 ~ "**",
    out_fig5[[3]] < 0.1 ~ "*",
    TRUE ~ ""
  ),
  type  = c(rep(c("Overall", "Culture", "Politics"), 7)),
  y   = 0,
  x   = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7)
)

means %>% filter(country == "israel") %>%
  mutate(type = factor(str_to_sentence(type), levels=c("Overall", "Politics", "Culture"))) %>%
  mutate(ci_u = ifelse(ci_u > 1, 0.999, ci_u)) %>% 
  ggplot(aes(x = ideo7, color=age)) + 
  geom_point(aes(y = mean*100), position=position_dodge(width=0.3))  +
  # geom_line(stat="smooth", method = "loess", aes(y = mean*100), se=FALSE, size=0.4, alpha = 1) +
  geom_text(data  = fig5_signif,
              mapping = aes(x = x, y = y, label = label, color=NULL)) +
geom_errorbar(data = subset(means %>%
                                mutate(type = factor(str_to_sentence(type), 
                                                     levels=c("Overall", 
                                                              "Politics", "Culture"))) %>% 
                                mutate(ci_u = ifelse(ci_u > 1, 0.999, ci_u)), 
                              country=="israel"),
                aes(x=ideo7, ymin = ci_l*100, ymax=100*ci_u), 
                width=0, position=position_dodge(width=0.3), alpha=0.5) +
  facet_wrap(~type) + 
  ylab("Percent favorability\n") + 
  xlab("\nRespondent Ideology") + 
  scale_x_continuous(breaks = c(2, 4, 6), labels=c("Lib.", "Mod.", "Con.")) + 
  theme_bw() +
  theme(strip.background = element_blank(), 
        axis.ticks.y = element_blank(),
        legend.position="bottom", legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(-7,-7,-7,-7)) +
  guides(color = guide_legend(reverse=TRUE)) + 
  scale_color_manual(name = NULL, values=c("#46ACC8", "darkgreen"), 
                     labels = c("Ages 31+", "Ages 18-30")) +
  ylim(0, 100)


## figure 6 ----------------------------------------------------------------

# create label placements 
dat_text <- data.frame(
  label = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  country   = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  x     = c(4.2, 3.5, 4.5, 4.5, 3.6, 4.2),
  y     = c(28, 20, 22, 35, 77, 85)
)

dat_text2 <- data.frame(
  label = c("Israel", "Israel", "Israel", "Israel", "Israel", "Israel"),
  country   = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  x     = c(6, 6, 6, 2, 2, 2),
  y     = c(75, 75, 75, 50, 50, 50)
)



out_fig6 <- out_fig6 %>% arrange(country)

fig6_signif <- data.frame(
  label = case_when(
    out_fig6[[3]] < 0.05 ~ "**",
    out_fig6[[3]] < 0.1 ~ "*",
    TRUE ~ ""
  ),
  country   = c(rep("Russia", 7), rep("Iran", 7), rep("China", 7), 
                rep("Nigeria", 7), rep("India",7), rep("Mexico", 7)),
  x     = 0,
  y     = rep(1:7, 6)
)




# graph
means %>% filter(type == "culture", age == "under30", country != "israel") %>% 
  mutate(country = factor(str_to_sentence(country), levels = c("Russia", "Iran", "China", "Israel", "Nigeria", "India", "Mexico"))) %>% 
  ggplot() + 
  geom_point(aes(x = ideo7 + 0.2, y = mean*100), color ="#E58601") +  
  facet_wrap(.~country) + 
  # geom_line(aes(x = ideo7, y=mean*100),        stat="smooth", method = "loess", se=FALSE, size=0.4, color="#E58601") +
  geom_errorbar(aes(x = ideo7 + 0.2, ymin=ci_l*100, ymax = ci_u*100), width=0, alpha=0.4, color="#E58601", position=position_dodge(width=0.3), size=0.3) + 
  geom_point(data=transform(means %>% filter(type == "culture", age == "under30", 
                                             country == "israel"), country=NULL),
             aes(x=ideo7, y=mean*100),colour="#046C9A") + 
 # geom_line(data=transform(means %>% filter(type == "culture", age == "under30", country == "israel"), country=NULL),aes(x=ideo7, y=mean*100),colour="#046C9A", stat="smooth", method = "loess", se=FALSE, size=0.4) +
  geom_errorbar(data = transform(means %>% filter(type == "culture", age == "under30", 
                                                  country == "israel"), country=NULL),
                aes(x=ideo7, ymin = ci_l*100, ymax=100*ci_u), 
                width=0, colour="#046C9A", alpha=0.4) +
  theme_bw() +
  ylab("Percentage of respondents viewing the\n country's languages, cultures, favorably\n") +
  xlab("\nRespondent ideology") +
  scale_x_continuous(breaks = c(2, 4, 6), labels=c("Lib.", "Mod.", "Con.")) + 
  theme(strip.background = element_blank(), 
        axis.ticks.y = element_blank()) + 
  geom_text(
    data    = dat_text,
    mapping = aes(x = x, y = y, label = label),
    color ="#E58601") + 
  geom_text(
    data    = dat_text2,
    mapping = aes(x = x, y = y, label = label),
    color ="#046C9A") +
  ylim(0, 100) + geom_text(data    = fig6_signif,
                           mapping = aes(x = y, y = x, label = label))



## figure S1 ---------------------------------------------------------------

#only select the columns we need
df_3 <- remove_all_labels(full) %>% 
  dplyr::select(caseid, age, ideo7, weight_age30, weight_overall, starts_with("q2"), 
                          ends_with("culture"), ends_with("overall"), ends_with("politics"))

# rename identity columns
df_3 <- rename(df_3, socialist = q2_mult_1,
               leftist = q2_mult_2, progressive = q2_mult_3,
               libertarian = q2_mult_4, altright = q2_mult_5,
               chcon = q2_mult_6)

identities <- c("leftist", "socialist", "progressive", "libertarian", "altright", "chcon")


#apply weights under 30
u30_iden <- svydesign(ids = ~1, weights = ~weight_age30, data = df_3[!is.na(df_3$weight_age30),])

#apply weights over 30
o30_iden <- subset(svydesign(ids = ~1, weights = ~weight_overall, data = df_3), age > 30)

# get favorability for each identity option (identifier = 1, non-identifier = 2)
identity_rankings <- data.frame()

for (country in variables) {
  for (identity in identities) {
    temp <- cbind(as.data.frame(
      svytable(as.formula(paste0("~", country, "+", identity)), design = u30_iden, Ntotal = 100)),
      variable = country, iden = identity) %>% rename(rating = country, token = identity)
    
    identity_rankings <- rbind(identity_rankings, temp)
    rm(temp)
    
    identity_rankings
  }
  
  
}

# filter to only being identifiers, split condition and country
identity_rankings <- identity_rankings %>% filter(token == 1) %>% dplyr::select(-token) %>%
  separate(variable, into=c("country", "type"), sep="_")


identity_rankings$country <- factor(str_to_sentence(identity_rankings$country), 
                                    levels = c("Russia", "Iran", "China", 
                                               "Israel", "Nigeria", "India", "Mexico"))
#reorder ratings 
identity_rankings$rating <- factor(identity_rankings$rating, 
                                   levels = c("1", "2", "5", "3", "4"))

#chart

identity_labels <- c(leftist = "Leftist", socialist = "Socialist", progressive = "Progressive", libertarian = "Libertarian", altright = "Alt-Right", chcon = "Christian Conservative")

identity_rankings %>% filter(type=="overall", rating!=5) %>%
  filter(iden == "leftist" | iden == "socialist" | iden == "progressive") %>%
  group_by(country, iden) %>% mutate(freq = Freq * 100 / sum(Freq)) %>%
  ggplot(aes(y=country, x=freq, fill=rating)) +
  facet_wrap(~iden, labeller=labeller(iden = identity_labels), ncol=1) +
  geom_bar(stat="identity") + 
  scale_fill_brewer(palette="YlOrRd", 
                    name=NULL,
                    labels=c("Very favorable", "Somewhat favorable", 
                             "Somewhat unfavorable", "Very unfavorable"),
                    guide = guide_legend(reverse = TRUE)) +
  theme_bw() +
  theme(legend.position="bottom", panel.border = element_blank(), 
        panel.grid.major = element_blank(), axis.ticks.y = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_blank(), 
        axis.text.y = element_text(margin = margin(r = 0))) +
  xlab(NULL) + 
  ylab(NULL) +
  scale_y_discrete(limits = rev(levels(as.factor(identity_rankings$country)))) +
  coord_cartesian(xlim = c(0, 100))


## figure S2 ---------------------------------------------------------------

# make and store plot 
p <- means %>% filter(type == "overall") %>% 
  mutate(country = factor(str_to_sentence(country), 
                          levels = c("Russia", "Iran", "China", "x", 
                                     "Israel", "y", "Nigeria", "India", "Mexico"))) %>%
  ggplot(aes(x = ideo7, color=age, y=mean*100)) +
  geom_line(stat="smooth", method="loess") +
  geom_point(position=position_dodge(width=0.3)) +   
  geom_errorbar(aes(ymin = ci_l*100, ymax=ci_u*100), width=0, 
                position=position_dodge(width=0.3), alpha=0.4) +
  theme_bw() +
  theme(strip.background = element_blank(), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 12), 
        legend.position="bottom", legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(-8,-8,-8,-8), 
        axis.title.x = element_text(margin = margin(t = -5))) +
  ylab("Percentage of respondents viewing country favorably\n") + 
  xlab("\nRespondent Ideology") +  
  scale_x_continuous(breaks = c(1.5, 4, 6.5), labels=c("Lib.", "Mod.", "Con.")) +
  scale_color_manual(name = NULL, values=c("#46ACC8", "darkgreen"), 
                     labels = c("Ages 31+", "Ages 18-30"), 
                     guide = guide_legend(reverse = TRUE)) +
  facet_wrap(~country, drop=FALSE) + 
  coord_cartesian(ylim = c(-5, 105))

# turn into grob
g <- ggplotGrob(p)

# get the grobs that must be removed
rm_grobs <- g$layout$name %in% c("panel-2-1", "panel-2-3", "strip-t-1-2", "strip-t-3-2")

# remove grobs
g$grobs[rm_grobs] <- NULL
g$layout <- g$layout[!rm_grobs, ]

## move axis closer to panel
g$layout[g$layout$name == "axis-l-2-1", c("l", "r")] = c(8, 8)
grid.newpage()
grid.draw(g)

rm(p, g)



## figure S3 ---------------------------------------------------------------

# make labels for countries

dat_text2 <- data.frame(
  label = c("Israel", "Israel", "Israel", "Israel", "Israel", "Israel"),
  country   = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  x     = c(6, 6, 6, 6, 5, 4),
  y     = c(70, 70, 70, 70, 60, 45)
)

dat_text <- data.frame(
  label = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  country   = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  x     = c(5, 3.2, 5, 5, 3, 3),
  y     = c(7, 25, 25, 20, 82, 94)
)


means %>% filter(type == "culture", age == "over30", country != "israel") %>% 
  mutate(country = factor(str_to_sentence(country), 
                          levels = c("Russia", "Iran", "China", "Israel", 
                                     "Nigeria", "India", "Mexico"))) %>% 
  ggplot() + 
  geom_point(aes(x = ideo7, y = mean*100), color ="#E58601") + 
  facet_wrap(.~country) + 
  geom_line(aes(x = ideo7, y=mean*100), 
            stat="smooth", method = "loess", se=FALSE, size=0.4, color="#E58601") +
  geom_point(data=transform(means %>% filter(type == "culture", age == "over30", 
                                             country == "israel"), country=NULL),
             aes(x=ideo7, y=mean*100),colour="#046C9A") + 
  geom_line(data=transform(means %>% filter(type == "culture", age == "over30", 
                                            country == "israel"), country=NULL),
            aes(x=ideo7, y=mean*100),colour="#046C9A", stat="smooth", 
            method = "loess", se=FALSE, size=0.4) +
  geom_errorbar(data = transform(means %>% filter(type == "culture", age == "over30", 
                                                  country == "israel"), country=NULL),
                aes(x=ideo7, ymin = ci_l*100, ymax=100*ci_u), 
                width=0, colour="#046C9A", alpha=0.4) +
  theme_bw() +
  ylab("Percentage of respondents viewing the\n country's languages, cultures, favorably\n") +
  xlab("\nRespondent ideology") +
  scale_x_continuous(breaks = c(2, 4, 6), labels=c("Lib.", "Mod.", "Con.")) + 
  theme(strip.background = element_blank(), 
        axis.ticks.y = element_blank()) + 
  geom_text(
    data    = dat_text,
    mapping = aes(x = x, y = y, label = label),
    color ="#E58601") + 
  geom_text(
    data    = dat_text2,
    mapping = aes(x = x, y = y, label = label),
    color ="#046C9A"
  ) +
  ylim(0, 100)

rm(dat_text, dat_text2)


## figure S4 ---------------------------------------------------------------

#make labels for charts
dat_text2 <- data.frame(
  label = c("Israel", "Israel", "Israel", "Israel", "Israel", "Israel"),
  country   = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  x     = c(5, 5, 5, 3, 3, 3),
  y     = c(70, 70, 70, 15, 15, 15)
)


dat_text <- data.frame(
  label = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  country   = c("Russia", "Iran", "China", "Nigeria", "India", "Mexico"),
  x     = c(4.5, 3, 4.8, 5, 3.6, 2.7),
  y     = c(18, 5, 18, 25, 68, 70)
)



means %>% filter(type == "politics", age == "under30", country != "israel") %>% 
  mutate(country = factor(str_to_sentence(country), 
                          levels = c("Russia", "Iran", "China", "Israel", 
                                     "Nigeria", "India", "Mexico"))) %>% 
  ggplot() + 
  geom_point(aes(x = ideo7, y = mean*100), color ="#E58601") + 
  facet_wrap(.~country) + 
  geom_line(aes(x = ideo7, y=mean*100), 
            stat="smooth", method = "loess", se=FALSE, size=0.4, color="#E58601") +
  geom_point(data=transform(means %>% filter(type == "politics", age == "under30", 
                                             country == "israel"), country=NULL),
             aes(x=ideo7, y=mean*100),colour="#046C9A") + 
  geom_line(data=transform(means %>% filter(type == "politics", age == "under30", 
                                            country == "israel"), country=NULL),
            aes(x=ideo7, y=mean*100),colour="#046C9A", stat="smooth", 
            method = "loess", se=FALSE, size=0.4) +
  geom_errorbar(data = transform(means %>% filter(type == "politics", age == "under30", 
                                                  country == "israel"), country=NULL),
                aes(x=ideo7, ymin = ci_l*100, ymax=100*ci_u), 
                width=0, colour="#046C9A", alpha=0.4) +
  theme_bw() +
  ylab("Percentage of respondents viewing the\n country's politics and government favorably\n") +
  xlab("\nRespondent ideology") +
  scale_x_continuous(breaks = c(2, 4, 6), labels=c("Lib.", "Mod.", "Con.")) + 
  theme(strip.background = element_blank(), 
        axis.ticks.y = element_blank()) + 
  geom_text(data = dat_text,
    mapping = aes(x = x, y = y, label = label),
    color ="#E58601") + 
  geom_text(data    = dat_text2,
    mapping = aes(x = x, y = y, label = label),
    color ="#046C9A") +
  ylim(0, 100)


# regressions -------------------------------------------------------------



## table 2 -----------------------------------------------------------------

# match state jew percentage
data$pct_jew <- pct_jew_pop$pct_jewish[match(data$inputstate, pct_jew_pop$inputstate)]

# turn this into a factor
data$Q5_7_treat <- as.factor(data$Q5_7_treat)

# make israel column
data$israel <- rowSums(data[,c("israel_overall", "israel_politics", "israel_culture")], na.rm=TRUE)

# make column for currently in college
data$college <- ifelse(data$q1 == 2 | data$q1 == 3, "4-year college", 
                       ifelse(data$q1 == 1, "no", "Other college"))

data$college <- factor(data$college, levels=c("no", "4-year college", "Other college"))

# make simpler race column 
data$race_simp <- case_when(
  data$race == 1 ~ "White",
  data$race == 2 ~ "Black",
  data$race == 3 ~ "Latino",
  TRUE ~ "Other race"
)

data$race_simp <- factor(data$race_simp, levels = c("White", "Black", "Latino", "Other race"))


#rename identity columns
data <- rename(data, socialist = q2_mult_1,
               leftist = q2_mult_2, progressive = q2_mult_3,
               libertarian = q2_mult_4, altright = q2_mult_5,
               chcon = q2_mult_6)


#recode identities so it's 1 if identifies as such and 0 otherwise -- check because of hardcoding
for (i in 10:15){ 
  
  data[,i] <- ifelse(data[,i] == 1, 1, 0)
}



#applys weights under 30
under30_reg <- svydesign(ids = ~1, weights = ~weight_age30, data = data[!is.na(data$weight_age30),])

#apply weights over 30
over30_reg <- subset(svydesign(ids = ~1, weights = ~weight_overall, data = data), age >30)


# run OLS for people in "overall" condition

o30ols <- svyglm(israel ~ ideo7 + pct_jew + race_simp  + 
                       age + as.factor(gender), 
                     design = subset(over30_reg, Q5_7_treat == 3))

u30ols <- svyglm(israel ~ ideo7 + pct_jew + race_simp + 
                       college + 
                       age + as.factor(gender), 
                     design = subset(under30_reg, Q5_7_treat == 3))

# print both out
stargazer(u30ols, o30ols, no.space = TRUE)

## table 2 w logit -----------------------------------------------------------------

#fit logit models



o30logit <- svyglm(israel ~ ideo7 + pct_jew + race_simp  + 
                   age + as.factor(gender), 
                 design = subset(over30_reg, Q5_7_treat == 3), family=quasibinomial)

u30logit <- svyglm(israel ~ ideo7 + pct_jew + race_simp + 
                     college +
                   age + as.factor(gender), 
                 design = subset(under30_reg, Q5_7_treat == 3), family=quasibinomial)

stargazer(u30logit, o30logit, no.space = TRUE)


#get marginal effects 

log.marg.u30 <-coef(u30logit)*mean(dlogis(predict(u30logit)), na.rm=T)
log.marg.o30 <-coef(o30logit)*mean(dlogis(predict(o30logit)), na.rm=T)

effects<-data.frame(effect=c(log.marg.u30, log.marg.o30[1:6], NA, NA, log.marg.o30[7:8]),
                    term=rep(names(log.marg.u30),2),
                    model=c(rep("under 30", length(log.marg.u30)),rep("over30", length(log.marg.u30)))
)

effects%>%
  ggplot(aes(x=term, y=effect))+geom_point(aes(color=model, group=model, shape=model),
                                           size=2)+
  ylab("Marginal Effect")+
  xlab("Model Term")+
  geom_abline(intercept = 0, slope=0)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  ggtitle(label = "Comparison of marginal effects in logit models by age")


rbind(cbind(ggpredict(o30logit, term="ideo7", condition=c(pct_jew = 2)), age="over30"), 
      cbind(ggpredict(u30logit, term="ideo7", condition=c(pct_jew = 2)), age="under30")) %>% 
  ggplot(aes(x=x, y=predicted, color=age)) + geom_point() + 
  geom_errorbar(aes(ymin = conf.low, ymax=conf.high),  width=0, size=0.3) + ylim(0, 1) +  
  xlab("\nRespondent Ideology") +  
  ylab("Predicted probability of viewing Israel favorably") +
  scale_x_continuous(breaks = c(2, 4, 6), 
                     labels=c("Liberal", "Moderate", "Conservative")) +
  scale_color_manual(name = NULL, values=c("#46ACC8", "darkgreen"), 
                     labels = c("Ages 31+", "Ages 18-30"), 
                     guide = guide_legend(reverse = TRUE)) +
  ggtitle("") +  
  theme_bw() +
  theme(legend.position="bottom", legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(-8,-8,-8,-8), 
        axis.title.x = element_text(margin = margin(t = -5)))
  
  


## table 3 -----------------------------------------------------------------

#run OLS on issue rankings from previous weighting

stargazer(svyglm(israel_palest ~ ideo7 + pct_jew + 
                   race_simp + 
                   college +
                   age + as.factor(gender), design = under30_issue), 
          svyglm(israel_palest ~ ideo7 + pct_jew + 
                   race_simp + 
                   age + as.factor(gender), design = over30_issue), 
          no.space = TRUE)



